# -- Clear environment of variables and functions------------------
rm(list = ls(all = TRUE))
# Clear environmet of packages
if(is.null(sessionInfo()$otherPkgs) == FALSE)lapply(paste("package:", names(sessionInfo()$otherPkgs), sep=""), detach, character.only = TRUE, unload = TRUE)
# -- Load packages ---------------------------------------
library(tidyverse)
library(gridExtra)
library(janitor)
library(knitr)
library(GGally)
library(MultinomialCI)
library(htmlTable)
library(Hmisc)
library(formattable)
library(rms)
library(MultNonParam)
library(inspectdf)
library(DataExplorer)
library(leaflet)
library(ggmap)
ggmap::register_google(key = "AIzaSyDanbharxh0M9j1axst7NGeS1OnSXUf9WY")
library(htmltools)
library(colorspace)
library(readr)
fp <- read_csv("AB_NYC_2019.csv")
# make them factor variables
fp$id <- factor(fp$id)
fp$name <- factor(fp$name)
fp$host_id <- factor(fp$host_id)
fp$host_name <- factor(fp$host_name)
fp$neighbourhood_group <- factor(fp$neighbourhood_group)
fp$neighbourhood <- factor(fp$neighbourhood)
fp$room_type <- factor(fp$room_type)
# delete inactivated housing list
inactivated <- fp[ which(fp$price == 0 | fp$availability_365 == 0),]
fp <- subset(fp, fp$price > 0)
fp <- subset(fp, fp$availability_365 > 0)
# fp <- subset(fp, fp$price < 1500) # get rid of expensive data
# Add column "Listing history"
fp$list_history <- fp$number_of_reviews / fp$reviews_per_month
# convert na values to 0s
fp$reviews_per_month[is.na(fp$reviews_per_month)] <- 0
# create min_spend
fp$min_spend <- fp$price * fp$minimum_nights
# delete id and host_id
delete <- c("id", "host_id")
fp[delete] <- NULL
# create borough area
manhattan <- subset(fp, fp$neighbourhood_group == "Manhattan")
brooklyn <- subset(fp, fp$neighbourhood_group == "Brooklyn")
bronx <- subset(fp, fp$neighbourhood_group == "Bronx")
queens <- subset(fp, fp$neighbourhood_group == "Queens")
staten <- subset(fp, fp$neighbourhood_group == "Staten Island")
# to look at the data
head(fp)
## # A tibble: 6 x 16
## name host_name neighbourhood_g… neighbourhood latitude longitude
## <fct> <fct> <fct> <fct> <dbl> <dbl>
## 1 Clea… John Brooklyn Kensington 40.6 -74.0
## 2 Skyl… Jennifer Manhattan Midtown 40.8 -74.0
## 3 THE … Elisabeth Manhattan Harlem 40.8 -73.9
## 4 Cozy… LisaRoxa… Brooklyn Clinton Hill 40.7 -74.0
## 5 Larg… Chris Manhattan Murray Hill 40.7 -74.0
## 6 Larg… Shunichi Manhattan Hell's Kitch… 40.8 -74.0
## # … with 10 more variables: room_type <fct>, price <dbl>,
## # minimum_nights <dbl>, number_of_reviews <dbl>, last_review <date>,
## # reviews_per_month <dbl>, calculated_host_listings_count <dbl>,
## # availability_365 <dbl>, list_history <dbl>, min_spend <dbl>
# to see how many observations, variables, types etc
str(fp)
## Classes 'tbl_df', 'tbl' and 'data.frame': 31354 obs. of 16 variables:
## $ name : Factor w/ 47894 levels "_Special Offers: Guest Assistance",..: 12563 38007 45006 15581 24841 24888 17562 5566 10757 26708 ...
## $ host_name : Factor w/ 11452 levels " Valéria","-TheQueensCornerLot",..: 4996 4790 2912 6209 1937 9648 1234 6029 5408 5958 ...
## $ neighbourhood_group : Factor w/ 5 levels "Bronx","Brooklyn",..: 2 3 3 2 3 3 3 3 3 2 ...
## $ neighbourhood : Factor w/ 221 levels "Allerton","Arden Heights",..: 109 128 95 42 138 96 36 203 96 183 ...
## $ latitude : num 40.6 40.8 40.8 40.7 40.7 ...
## $ longitude : num -74 -74 -73.9 -74 -74 ...
## $ room_type : Factor w/ 3 levels "Entire home/apt",..: 2 1 2 1 1 2 1 1 2 2 ...
## $ price : num 149 225 150 89 200 79 150 135 85 89 ...
## $ minimum_nights : num 1 1 3 1 3 2 1 5 2 4 ...
## $ number_of_reviews : num 9 45 0 270 74 430 160 53 188 167 ...
## $ last_review : Date, format: "2018-10-19" "2019-05-21" ...
## $ reviews_per_month : num 0.21 0.38 0 4.64 0.59 3.47 1.33 0.43 1.5 1.34 ...
## $ calculated_host_listings_count: num 6 2 1 1 1 1 4 1 1 3 ...
## $ availability_365 : num 365 355 365 194 129 220 188 6 39 314 ...
## $ list_history : num 42.9 118.4 NA 58.2 125.4 ...
## $ min_spend : num 149 225 450 89 600 158 150 675 170 356 ...
summary(fp)
## name host_name
## Hillside Hotel : 18 Sonder (NYC): 327
## Home away from home : 13 Michael : 242
## New york Multi-unit building : 12 Blueground : 232
## Loft Suite @ The Box House Hotel : 11 David : 228
## Artsy Private BR in Fort Greene Cumberland: 10 John : 211
## (Other) :31286 (Other) :30106
## NA's : 4 NA's : 8
## neighbourhood_group neighbourhood latitude
## Bronx : 913 Bedford-Stuyvesant: 2478 Min. :40.50
## Brooklyn :12253 Williamsburg : 2051 1st Qu.:40.69
## Manhattan :13559 Harlem : 1734 Median :40.72
## Queens : 4298 Bushwick : 1447 Mean :40.73
## Staten Island: 331 Hell's Kitchen : 1446 3rd Qu.:40.76
## Upper East Side : 1095 Max. :40.91
## (Other) :21103
## longitude room_type price
## Min. :-74.24 Entire home/apt:16532 Min. : 10.0
## 1st Qu.:-73.98 Private room :13960 1st Qu.: 70.0
## Median :-73.95 Shared room : 862 Median : 112.0
## Mean :-73.95 Mean : 162.1
## 3rd Qu.:-73.93 3rd Qu.: 189.0
## Max. :-73.71 Max. :10000.0
##
## minimum_nights number_of_reviews last_review
## Min. : 1.000 Min. : 0.00 Min. :2011-04-25
## 1st Qu.: 2.000 1st Qu.: 2.00 1st Qu.:2019-04-28
## Median : 3.000 Median : 10.00 Median :2019-06-15
## Mean : 8.338 Mean : 31.85 Mean :2019-03-19
## 3rd Qu.: 5.000 3rd Qu.: 39.00 3rd Qu.:2019-06-28
## Max. :1250.000 Max. :629.00 Max. :2019-07-08
## NA's :5207
## reviews_per_month calculated_host_listings_count availability_365
## Min. : 0.000 Min. : 1.00 Min. : 1.0
## 1st Qu.: 0.170 1st Qu.: 1.00 1st Qu.: 55.0
## Median : 0.920 Median : 1.00 Median :168.0
## Mean : 1.509 Mean : 10.34 Mean :175.8
## 3rd Qu.: 2.340 3rd Qu.: 3.00 3rd Qu.:305.0
## Max. :58.500 Max. :327.00 Max. :365.0
##
## list_history min_spend
## Min. : 1.000 Min. : 10
## 1st Qu.: 6.828 1st Qu.: 140
## Median : 18.859 Median : 330
## Mean : 25.783 Mean : 1591
## 3rd Qu.: 38.636 3rd Qu.: 900
## Max. :125.424 Max. :1170000
## NA's :5207
All New York City
grid.arrange(
# neighbourhood_group
ggplot(data = fp, mapping = aes(x = neighbourhood_group)) +
geom_bar(),
# neighbourhood
ggplot(data = fp, mapping = aes(x = neighbourhood)) +
geom_bar(),
# room_type
ggplot(data = fp, mapping = aes(x = room_type)) +
geom_bar(),
ncol = 1)
Manhattan
# Manhattan
grid.arrange(
# neighbourhood
ggplot(data = manhattan, mapping = aes(x = neighbourhood)) +
coord_flip() +
geom_bar(),
# room_type
ggplot(data = manhattan, mapping = aes(x = room_type)) +
geom_bar(),
ncol = 2 )
Brooklyn
# brooklyn
grid.arrange(
# neighbourhood
ggplot(data = brooklyn, mapping = aes(x = neighbourhood)) +
coord_flip() +
geom_bar(),
# room_type
ggplot(data = brooklyn, mapping = aes(x = room_type)) +
geom_bar(),
ncol = 2 )
Bronx
# brooklyn
grid.arrange(
# neighbourhood
ggplot(data = bronx, mapping = aes(x = neighbourhood)) +
coord_flip() +
geom_bar(),
# room_type
ggplot(data = bronx, mapping = aes(x = room_type)) +
geom_bar(),
ncol = 2 )
Staten island
# brooklyn
grid.arrange(
# neighbourhood
ggplot(data = staten, mapping = aes(x = neighbourhood)) +
coord_flip() +
geom_bar(),
# room_type
ggplot(data = staten, mapping = aes(x = room_type)) +
geom_bar(),
ncol = 2 )
Frequency of categorical variables
fp %>%
inspect_cat() %>%
show_plot()
All NYC
grid.arrange(
ggplot(data = fp, mapping = aes(x = price)) +
geom_histogram(),
ggplot(data = fp, mapping = aes(x = 1)) +
geom_boxplot(mapping = aes(y = price)) +
coord_flip(),
ncol = 1 )
grid.arrange(
# Create histogram
ggplot(data = fp, mapping = aes(x = minimum_nights)) +
geom_histogram(),
# Add boxplot
ggplot(data = fp, mapping = aes(x = 1)) +
geom_boxplot(mapping = aes(y = minimum_nights)) +
coord_flip(), # use to have same x-axis on both graphs
# Set number of columns in grid.arrange
ncol = 1 )
grid.arrange(
ggplot(data = fp, mapping = aes(x = number_of_reviews)) +
geom_histogram(),
ggplot(data = fp, mapping = aes(x = 1)) +
geom_boxplot(mapping = aes(y = number_of_reviews)) +
coord_flip(), # use to have same x-axis on both graphs
ncol = 1 )
grid.arrange(
# Create histogram
ggplot(data = fp, mapping = aes(x = reviews_per_month)) +
geom_histogram(),
# Add boxplot
ggplot(data = fp, mapping = aes(x = 1)) +
geom_boxplot(mapping = aes(y = reviews_per_month)) +
coord_flip(), # use to have same x-axis on both graphs
# Set number of columns in grid.arrange
ncol = 1 )
grid.arrange(
# Create histogram
ggplot(data = fp, mapping = aes(x = calculated_host_listings_count)) +
geom_histogram(),
# Add boxplot
ggplot(data = fp, mapping = aes(x = 1)) +
geom_boxplot(mapping = aes(y = calculated_host_listings_count)) +
coord_flip(), # use to have same x-axis on both graphs
# Set number of columns in grid.arrange
ncol = 1 )
grid.arrange(
# Create histogram
ggplot(data = fp, mapping = aes(x = availability_365)) +
geom_histogram(),
# Add boxplot
ggplot(data = fp, mapping = aes(x = 1)) +
geom_boxplot(mapping = aes(y = availability_365)) +
coord_flip(), # use to have same x-axis on both graphs
# Set number of columns in grid.arrange
ncol = 1 )
grid.arrange(
# Create histogram
ggplot(data = fp, mapping = aes(x = list_history)) +
geom_histogram(),
# Add boxplot
ggplot(data = fp, mapping = aes(x = 1)) +
geom_boxplot(mapping = aes(y = list_history)) +
coord_flip(), # use to have same x-axis on both graphs
# Set number of columns in grid.arrange
ncol = 1 )
Show all numerical variables
# Distribution of numeric variable
fp %>%
inspect_num() %>%
show_plot()
neighbourhood_group & room_type
fp %>%
tabyl(neighbourhood_group, room_type) %>% # creates table of counts
adorn_totals(where = c("row", "col")) # Total margins
## neighbourhood_group Entire home/apt Private room Shared room Total
## Bronx 318 545 50 913
## Brooklyn 6071 5876 306 12253
## Manhattan 8441 4782 336 13559
## Queens 1550 2586 162 4298
## Staten Island 152 171 8 331
## Total 16532 13960 862 31354
fp %>%
tabyl(neighbourhood_group, room_type) %>%
adorn_totals(where = c("row", "col")) %>%
adorn_percentages(denominator = "all") %>% # creates proportions
adorn_rounding(2) # round decimals
## neighbourhood_group Entire home/apt Private room Shared room Total
## Bronx 0.01 0.02 0.00 0.03
## Brooklyn 0.19 0.19 0.01 0.39
## Manhattan 0.27 0.15 0.01 0.43
## Queens 0.05 0.08 0.01 0.14
## Staten Island 0.00 0.01 0.00 0.01
## Total 0.53 0.45 0.03 1.00
neighbourhood & room_type Manhattan
#
fp %>%
tabyl(neighbourhood, room_type) %>% # creates table of counts
adorn_totals(where = c("row", "col")) # Total margins
## neighbourhood Entire home/apt Private room Shared room Total
## Allerton 15 22 0 37
## Arden Heights 3 0 0 3
## Arrochar 10 11 0 21
## Arverne 46 25 2 73
## Astoria 213 317 18 548
## Bath Beach 5 10 0 15
## Battery Park City 23 10 1 34
## Bay Ridge 54 51 1 106
## Bay Terrace 3 2 1 6
## Bay Terrace, Staten Island 0 0 0 0
## Baychester 3 3 0 6
## Bayside 12 24 0 36
## Bayswater 5 12 0 17
## Bedford-Stuyvesant 1161 1248 69 2478
## Belle Harbor 5 3 0 8
## Bellerose 2 9 1 12
## Belmont 3 13 1 17
## Bensonhurst 23 31 1 55
## Bergen Beach 8 2 0 10
## Boerum Hill 81 18 0 99
## Borough Park 22 74 16 112
## Breezy Point 0 3 0 3
## Briarwood 18 27 3 48
## Brighton Beach 26 32 6 64
## Bronxdale 4 8 1 13
## Brooklyn Heights 69 14 0 83
## Brownsville 17 31 0 48
## Bull's Head 0 4 0 4
## Bushwick 445 952 50 1447
## Cambria Heights 4 16 0 20
## Canarsie 73 66 1 140
## Carroll Gardens 96 26 0 122
## Castle Hill 4 5 0 9
## Castleton Corners 2 1 0 3
## Chelsea 520 176 10 706
## Chinatown 134 90 2 226
## City Island 10 7 0 17
## Civic Center 18 9 0 27
## Claremont Village 8 20 0 28
## Clason Point 7 11 2 20
## Clifton 7 3 4 14
## Clinton Hill 200 117 5 322
## Co-op City 0 2 0 2
## Cobble Hill 48 10 0 58
## College Point 9 6 0 15
## Columbia St 21 5 0 26
## Concord 4 19 3 26
## Concourse 14 25 0 39
## Concourse Village 6 18 0 24
## Coney Island 9 3 2 14
## Corona 12 26 24 62
## Crown Heights 483 413 16 912
## Cypress Hills 49 62 6 117
## Ditmars Steinway 80 130 0 210
## Dongan Hills 3 3 0 6
## Douglaston 3 5 0 8
## Downtown Brooklyn 25 13 0 38
## DUMBO 12 8 0 20
## Dyker Heights 5 4 1 10
## East Elmhurst 45 113 5 163
## East Flatbush 157 254 22 433
## East Harlem 318 384 34 736
## East Morrisania 3 6 0 9
## East New York 89 117 5 211
## East Village 630 319 8 957
## Eastchester 6 6 1 13
## Edenwald 6 7 0 13
## Edgemere 4 6 0 10
## Elmhurst 44 128 7 179
## Eltingville 2 1 0 3
## Emerson Hill 2 2 0 4
## Far Rockaway 9 16 0 25
## Fieldston 3 3 1 7
## Financial District 479 88 5 572
## Flatbush 154 196 17 367
## Flatiron District 41 8 0 49
## Flatlands 31 42 1 74
## Flushing 92 269 8 369
## Fordham 12 40 4 56
## Forest Hills 39 54 6 99
## Fort Greene 172 109 6 287
## Fort Hamilton 22 18 2 42
## Fort Wadsworth 1 0 0 1
## Fresh Meadows 13 14 0 27
## Glendale 21 21 0 42
## Gowanus 82 69 1 152
## Gramercy 149 45 2 196
## Graniteville 1 1 0 2
## Grant City 4 2 0 6
## Gravesend 23 15 8 46
## Great Kills 5 5 0 10
## Greenpoint 346 225 14 585
## Greenwich Village 163 43 1 207
## Grymes Hill 5 1 0 6
## Harlem 674 1021 39 1734
## Hell's Kitchen 886 472 88 1446
## Highbridge 5 19 0 24
## Hollis 3 9 1 13
## Holliswood 0 3 0 3
## Howard Beach 10 6 0 16
## Howland Hook 2 0 0 2
## Huguenot 3 0 0 3
## Hunts Point 0 16 0 16
## Inwood 55 99 2 156
## Jackson Heights 46 93 11 150
## Jamaica 74 131 6 211
## Jamaica Estates 15 3 0 18
## Jamaica Hills 3 3 0 6
## Kensington 38 61 3 102
## Kew Gardens 4 19 3 26
## Kew Gardens Hills 12 7 0 19
## Kingsbridge 15 41 2 58
## Kips Bay 213 52 13 278
## Laurelton 9 7 0 16
## Lighthouse Hill 2 0 0 2
## Little Italy 53 25 2 80
## Little Neck 1 3 1 5
## Long Island City 146 201 10 357
## Longwood 14 29 1 44
## Lower East Side 313 186 31 530
## Manhattan Beach 4 2 0 6
## Marble Hill 2 4 0 6
## Mariners Harbor 3 5 0 8
## Maspeth 41 54 3 98
## Melrose 5 2 0 7
## Middle Village 20 9 0 29
## Midland Beach 4 1 0 5
## Midtown 802 272 13 1087
## Midwood 30 34 5 69
## Mill Basin 4 0 0 4
## Morningside Heights 43 84 1 128
## Morris Heights 2 12 0 14
## Morris Park 3 9 1 13
## Morrisania 3 8 3 14
## Mott Haven 23 29 0 52
## Mount Eden 0 3 0 3
## Mount Hope 9 7 0 16
## Murray Hill 313 46 7 366
## Navy Yard 3 3 0 6
## Neponsit 3 0 0 3
## New Brighton 1 4 0 5
## New Dorp 0 0 0 0
## New Dorp Beach 1 3 0 4
## New Springville 2 4 0 6
## NoHo 49 4 0 53
## Nolita 88 41 2 131
## North Riverdale 3 5 0 8
## Norwood 3 18 1 22
## Oakwood 2 2 0 4
## Olinville 0 1 2 3
## Ozone Park 30 20 1 51
## Park Slope 216 73 0 289
## Parkchester 7 19 5 31
## Pelham Bay 11 3 2 16
## Pelham Gardens 17 8 1 26
## Port Morris 14 12 15 41
## Port Richmond 0 5 0 5
## Prince's Bay 2 1 0 3
## Prospect Heights 116 73 0 189
## Prospect-Lefferts Gardens 160 156 9 325
## Queens Village 32 18 1 51
## Randall Manor 9 9 1 19
## Red Hook 34 18 1 53
## Rego Park 35 46 4 85
## Richmond Hill 32 48 3 83
## Richmondtown 1 0 0 1
## Ridgewood 85 182 10 277
## Riverdale 4 3 1 8
## Rockaway Beach 37 12 0 49
## Roosevelt Island 9 25 1 35
## Rosebank 4 2 0 6
## Rosedale 16 38 0 54
## Rossville 1 0 0 1
## Schuylerville 2 9 1 12
## Sea Gate 3 0 0 3
## Sheepshead Bay 45 70 17 132
## Shore Acres 3 2 0 5
## Silver Lake 1 0 0 1
## SoHo 166 77 1 244
## Soundview 3 9 0 12
## South Beach 1 6 0 7
## South Ozone Park 13 17 8 38
## South Slope 112 56 0 168
## Springfield Gardens 31 47 1 79
## Spuyten Duyvil 3 1 0 4
## St. Albans 27 45 0 72
## St. George 25 17 0 42
## Stapleton 9 16 0 25
## Stuyvesant Town 7 10 1 18
## Sunnyside 76 148 21 245
## Sunset Park 118 128 3 249
## Theater District 134 103 2 239
## Throgs Neck 11 12 0 23
## Todt Hill 2 1 0 3
## Tompkinsville 13 24 0 37
## Tottenville 5 2 0 7
## Tremont 2 3 2 7
## Tribeca 108 13 0 121
## Two Bridges 14 33 0 47
## Unionport 4 3 0 7
## University Heights 4 11 0 15
## Upper East Side 795 270 30 1095
## Upper West Side 712 354 23 1089
## Van Nest 5 3 3 11
## Vinegar Hill 13 7 1 21
## Wakefield 16 26 0 42
## Washington Heights 175 357 16 548
## West Brighton 5 13 0 18
## West Farms 1 1 0 2
## West Village 355 62 1 418
## Westchester Square 4 5 0 9
## Westerleigh 1 1 0 2
## Whitestone 3 7 0 10
## Williamsbridge 17 17 0 34
## Williamsburg 1098 936 17 2051
## Willowbrook 1 0 0 1
## Windsor Terrace 69 24 0 93
## Woodhaven 16 61 2 79
## Woodlawn 4 5 0 9
## Woodrow 0 0 0 0
## Woodside 51 123 1 175
## Total 16532 13960 862 31354
fp %>%
tabyl(neighbourhood, room_type) %>%
adorn_totals(where = c("row", "col")) %>%
adorn_percentages(denominator = "all") %>% # creates proportions
adorn_rounding(2) # round decimals
## neighbourhood Entire home/apt Private room Shared room Total
## Allerton 0.00 0.00 0.00 0.00
## Arden Heights 0.00 0.00 0.00 0.00
## Arrochar 0.00 0.00 0.00 0.00
## Arverne 0.00 0.00 0.00 0.00
## Astoria 0.01 0.01 0.00 0.02
## Bath Beach 0.00 0.00 0.00 0.00
## Battery Park City 0.00 0.00 0.00 0.00
## Bay Ridge 0.00 0.00 0.00 0.00
## Bay Terrace 0.00 0.00 0.00 0.00
## Bay Terrace, Staten Island 0.00 0.00 0.00 0.00
## Baychester 0.00 0.00 0.00 0.00
## Bayside 0.00 0.00 0.00 0.00
## Bayswater 0.00 0.00 0.00 0.00
## Bedford-Stuyvesant 0.04 0.04 0.00 0.08
## Belle Harbor 0.00 0.00 0.00 0.00
## Bellerose 0.00 0.00 0.00 0.00
## Belmont 0.00 0.00 0.00 0.00
## Bensonhurst 0.00 0.00 0.00 0.00
## Bergen Beach 0.00 0.00 0.00 0.00
## Boerum Hill 0.00 0.00 0.00 0.00
## Borough Park 0.00 0.00 0.00 0.00
## Breezy Point 0.00 0.00 0.00 0.00
## Briarwood 0.00 0.00 0.00 0.00
## Brighton Beach 0.00 0.00 0.00 0.00
## Bronxdale 0.00 0.00 0.00 0.00
## Brooklyn Heights 0.00 0.00 0.00 0.00
## Brownsville 0.00 0.00 0.00 0.00
## Bull's Head 0.00 0.00 0.00 0.00
## Bushwick 0.01 0.03 0.00 0.05
## Cambria Heights 0.00 0.00 0.00 0.00
## Canarsie 0.00 0.00 0.00 0.00
## Carroll Gardens 0.00 0.00 0.00 0.00
## Castle Hill 0.00 0.00 0.00 0.00
## Castleton Corners 0.00 0.00 0.00 0.00
## Chelsea 0.02 0.01 0.00 0.02
## Chinatown 0.00 0.00 0.00 0.01
## City Island 0.00 0.00 0.00 0.00
## Civic Center 0.00 0.00 0.00 0.00
## Claremont Village 0.00 0.00 0.00 0.00
## Clason Point 0.00 0.00 0.00 0.00
## Clifton 0.00 0.00 0.00 0.00
## Clinton Hill 0.01 0.00 0.00 0.01
## Co-op City 0.00 0.00 0.00 0.00
## Cobble Hill 0.00 0.00 0.00 0.00
## College Point 0.00 0.00 0.00 0.00
## Columbia St 0.00 0.00 0.00 0.00
## Concord 0.00 0.00 0.00 0.00
## Concourse 0.00 0.00 0.00 0.00
## Concourse Village 0.00 0.00 0.00 0.00
## Coney Island 0.00 0.00 0.00 0.00
## Corona 0.00 0.00 0.00 0.00
## Crown Heights 0.02 0.01 0.00 0.03
## Cypress Hills 0.00 0.00 0.00 0.00
## Ditmars Steinway 0.00 0.00 0.00 0.01
## Dongan Hills 0.00 0.00 0.00 0.00
## Douglaston 0.00 0.00 0.00 0.00
## Downtown Brooklyn 0.00 0.00 0.00 0.00
## DUMBO 0.00 0.00 0.00 0.00
## Dyker Heights 0.00 0.00 0.00 0.00
## East Elmhurst 0.00 0.00 0.00 0.01
## East Flatbush 0.01 0.01 0.00 0.01
## East Harlem 0.01 0.01 0.00 0.02
## East Morrisania 0.00 0.00 0.00 0.00
## East New York 0.00 0.00 0.00 0.01
## East Village 0.02 0.01 0.00 0.03
## Eastchester 0.00 0.00 0.00 0.00
## Edenwald 0.00 0.00 0.00 0.00
## Edgemere 0.00 0.00 0.00 0.00
## Elmhurst 0.00 0.00 0.00 0.01
## Eltingville 0.00 0.00 0.00 0.00
## Emerson Hill 0.00 0.00 0.00 0.00
## Far Rockaway 0.00 0.00 0.00 0.00
## Fieldston 0.00 0.00 0.00 0.00
## Financial District 0.02 0.00 0.00 0.02
## Flatbush 0.00 0.01 0.00 0.01
## Flatiron District 0.00 0.00 0.00 0.00
## Flatlands 0.00 0.00 0.00 0.00
## Flushing 0.00 0.01 0.00 0.01
## Fordham 0.00 0.00 0.00 0.00
## Forest Hills 0.00 0.00 0.00 0.00
## Fort Greene 0.01 0.00 0.00 0.01
## Fort Hamilton 0.00 0.00 0.00 0.00
## Fort Wadsworth 0.00 0.00 0.00 0.00
## Fresh Meadows 0.00 0.00 0.00 0.00
## Glendale 0.00 0.00 0.00 0.00
## Gowanus 0.00 0.00 0.00 0.00
## Gramercy 0.00 0.00 0.00 0.01
## Graniteville 0.00 0.00 0.00 0.00
## Grant City 0.00 0.00 0.00 0.00
## Gravesend 0.00 0.00 0.00 0.00
## Great Kills 0.00 0.00 0.00 0.00
## Greenpoint 0.01 0.01 0.00 0.02
## Greenwich Village 0.01 0.00 0.00 0.01
## Grymes Hill 0.00 0.00 0.00 0.00
## Harlem 0.02 0.03 0.00 0.06
## Hell's Kitchen 0.03 0.02 0.00 0.05
## Highbridge 0.00 0.00 0.00 0.00
## Hollis 0.00 0.00 0.00 0.00
## Holliswood 0.00 0.00 0.00 0.00
## Howard Beach 0.00 0.00 0.00 0.00
## Howland Hook 0.00 0.00 0.00 0.00
## Huguenot 0.00 0.00 0.00 0.00
## Hunts Point 0.00 0.00 0.00 0.00
## Inwood 0.00 0.00 0.00 0.00
## Jackson Heights 0.00 0.00 0.00 0.00
## Jamaica 0.00 0.00 0.00 0.01
## Jamaica Estates 0.00 0.00 0.00 0.00
## Jamaica Hills 0.00 0.00 0.00 0.00
## Kensington 0.00 0.00 0.00 0.00
## Kew Gardens 0.00 0.00 0.00 0.00
## Kew Gardens Hills 0.00 0.00 0.00 0.00
## Kingsbridge 0.00 0.00 0.00 0.00
## Kips Bay 0.01 0.00 0.00 0.01
## Laurelton 0.00 0.00 0.00 0.00
## Lighthouse Hill 0.00 0.00 0.00 0.00
## Little Italy 0.00 0.00 0.00 0.00
## Little Neck 0.00 0.00 0.00 0.00
## Long Island City 0.00 0.01 0.00 0.01
## Longwood 0.00 0.00 0.00 0.00
## Lower East Side 0.01 0.01 0.00 0.02
## Manhattan Beach 0.00 0.00 0.00 0.00
## Marble Hill 0.00 0.00 0.00 0.00
## Mariners Harbor 0.00 0.00 0.00 0.00
## Maspeth 0.00 0.00 0.00 0.00
## Melrose 0.00 0.00 0.00 0.00
## Middle Village 0.00 0.00 0.00 0.00
## Midland Beach 0.00 0.00 0.00 0.00
## Midtown 0.03 0.01 0.00 0.03
## Midwood 0.00 0.00 0.00 0.00
## Mill Basin 0.00 0.00 0.00 0.00
## Morningside Heights 0.00 0.00 0.00 0.00
## Morris Heights 0.00 0.00 0.00 0.00
## Morris Park 0.00 0.00 0.00 0.00
## Morrisania 0.00 0.00 0.00 0.00
## Mott Haven 0.00 0.00 0.00 0.00
## Mount Eden 0.00 0.00 0.00 0.00
## Mount Hope 0.00 0.00 0.00 0.00
## Murray Hill 0.01 0.00 0.00 0.01
## Navy Yard 0.00 0.00 0.00 0.00
## Neponsit 0.00 0.00 0.00 0.00
## New Brighton 0.00 0.00 0.00 0.00
## New Dorp 0.00 0.00 0.00 0.00
## New Dorp Beach 0.00 0.00 0.00 0.00
## New Springville 0.00 0.00 0.00 0.00
## NoHo 0.00 0.00 0.00 0.00
## Nolita 0.00 0.00 0.00 0.00
## North Riverdale 0.00 0.00 0.00 0.00
## Norwood 0.00 0.00 0.00 0.00
## Oakwood 0.00 0.00 0.00 0.00
## Olinville 0.00 0.00 0.00 0.00
## Ozone Park 0.00 0.00 0.00 0.00
## Park Slope 0.01 0.00 0.00 0.01
## Parkchester 0.00 0.00 0.00 0.00
## Pelham Bay 0.00 0.00 0.00 0.00
## Pelham Gardens 0.00 0.00 0.00 0.00
## Port Morris 0.00 0.00 0.00 0.00
## Port Richmond 0.00 0.00 0.00 0.00
## Prince's Bay 0.00 0.00 0.00 0.00
## Prospect Heights 0.00 0.00 0.00 0.01
## Prospect-Lefferts Gardens 0.01 0.00 0.00 0.01
## Queens Village 0.00 0.00 0.00 0.00
## Randall Manor 0.00 0.00 0.00 0.00
## Red Hook 0.00 0.00 0.00 0.00
## Rego Park 0.00 0.00 0.00 0.00
## Richmond Hill 0.00 0.00 0.00 0.00
## Richmondtown 0.00 0.00 0.00 0.00
## Ridgewood 0.00 0.01 0.00 0.01
## Riverdale 0.00 0.00 0.00 0.00
## Rockaway Beach 0.00 0.00 0.00 0.00
## Roosevelt Island 0.00 0.00 0.00 0.00
## Rosebank 0.00 0.00 0.00 0.00
## Rosedale 0.00 0.00 0.00 0.00
## Rossville 0.00 0.00 0.00 0.00
## Schuylerville 0.00 0.00 0.00 0.00
## Sea Gate 0.00 0.00 0.00 0.00
## Sheepshead Bay 0.00 0.00 0.00 0.00
## Shore Acres 0.00 0.00 0.00 0.00
## Silver Lake 0.00 0.00 0.00 0.00
## SoHo 0.01 0.00 0.00 0.01
## Soundview 0.00 0.00 0.00 0.00
## South Beach 0.00 0.00 0.00 0.00
## South Ozone Park 0.00 0.00 0.00 0.00
## South Slope 0.00 0.00 0.00 0.01
## Springfield Gardens 0.00 0.00 0.00 0.00
## Spuyten Duyvil 0.00 0.00 0.00 0.00
## St. Albans 0.00 0.00 0.00 0.00
## St. George 0.00 0.00 0.00 0.00
## Stapleton 0.00 0.00 0.00 0.00
## Stuyvesant Town 0.00 0.00 0.00 0.00
## Sunnyside 0.00 0.00 0.00 0.01
## Sunset Park 0.00 0.00 0.00 0.01
## Theater District 0.00 0.00 0.00 0.01
## Throgs Neck 0.00 0.00 0.00 0.00
## Todt Hill 0.00 0.00 0.00 0.00
## Tompkinsville 0.00 0.00 0.00 0.00
## Tottenville 0.00 0.00 0.00 0.00
## Tremont 0.00 0.00 0.00 0.00
## Tribeca 0.00 0.00 0.00 0.00
## Two Bridges 0.00 0.00 0.00 0.00
## Unionport 0.00 0.00 0.00 0.00
## University Heights 0.00 0.00 0.00 0.00
## Upper East Side 0.03 0.01 0.00 0.03
## Upper West Side 0.02 0.01 0.00 0.03
## Van Nest 0.00 0.00 0.00 0.00
## Vinegar Hill 0.00 0.00 0.00 0.00
## Wakefield 0.00 0.00 0.00 0.00
## Washington Heights 0.01 0.01 0.00 0.02
## West Brighton 0.00 0.00 0.00 0.00
## West Farms 0.00 0.00 0.00 0.00
## West Village 0.01 0.00 0.00 0.01
## Westchester Square 0.00 0.00 0.00 0.00
## Westerleigh 0.00 0.00 0.00 0.00
## Whitestone 0.00 0.00 0.00 0.00
## Williamsbridge 0.00 0.00 0.00 0.00
## Williamsburg 0.04 0.03 0.00 0.07
## Willowbrook 0.00 0.00 0.00 0.00
## Windsor Terrace 0.00 0.00 0.00 0.00
## Woodhaven 0.00 0.00 0.00 0.00
## Woodlawn 0.00 0.00 0.00 0.00
## Woodrow 0.00 0.00 0.00 0.00
## Woodside 0.00 0.00 0.00 0.01
## Total 0.53 0.45 0.03 1.00
neighbourhood & neighbourhood_group
fp %>%
tabyl(neighbourhood, neighbourhood_group) %>% # creates table of counts
adorn_totals(where = c("row", "col")) # Total margins
## neighbourhood Bronx Brooklyn Manhattan Queens Staten Island
## Allerton 37 0 0 0 0
## Arden Heights 0 0 0 0 3
## Arrochar 0 0 0 0 21
## Arverne 0 0 0 73 0
## Astoria 0 0 0 548 0
## Bath Beach 0 15 0 0 0
## Battery Park City 0 0 34 0 0
## Bay Ridge 0 106 0 0 0
## Bay Terrace 0 0 0 6 0
## Bay Terrace, Staten Island 0 0 0 0 0
## Baychester 6 0 0 0 0
## Bayside 0 0 0 36 0
## Bayswater 0 0 0 17 0
## Bedford-Stuyvesant 0 2478 0 0 0
## Belle Harbor 0 0 0 8 0
## Bellerose 0 0 0 12 0
## Belmont 17 0 0 0 0
## Bensonhurst 0 55 0 0 0
## Bergen Beach 0 10 0 0 0
## Boerum Hill 0 99 0 0 0
## Borough Park 0 112 0 0 0
## Breezy Point 0 0 0 3 0
## Briarwood 0 0 0 48 0
## Brighton Beach 0 64 0 0 0
## Bronxdale 13 0 0 0 0
## Brooklyn Heights 0 83 0 0 0
## Brownsville 0 48 0 0 0
## Bull's Head 0 0 0 0 4
## Bushwick 0 1447 0 0 0
## Cambria Heights 0 0 0 20 0
## Canarsie 0 140 0 0 0
## Carroll Gardens 0 122 0 0 0
## Castle Hill 9 0 0 0 0
## Castleton Corners 0 0 0 0 3
## Chelsea 0 0 706 0 0
## Chinatown 0 0 226 0 0
## City Island 17 0 0 0 0
## Civic Center 0 0 27 0 0
## Claremont Village 28 0 0 0 0
## Clason Point 20 0 0 0 0
## Clifton 0 0 0 0 14
## Clinton Hill 0 322 0 0 0
## Co-op City 2 0 0 0 0
## Cobble Hill 0 58 0 0 0
## College Point 0 0 0 15 0
## Columbia St 0 26 0 0 0
## Concord 0 0 0 0 26
## Concourse 39 0 0 0 0
## Concourse Village 24 0 0 0 0
## Coney Island 0 14 0 0 0
## Corona 0 0 0 62 0
## Crown Heights 0 912 0 0 0
## Cypress Hills 0 117 0 0 0
## Ditmars Steinway 0 0 0 210 0
## Dongan Hills 0 0 0 0 6
## Douglaston 0 0 0 8 0
## Downtown Brooklyn 0 38 0 0 0
## DUMBO 0 20 0 0 0
## Dyker Heights 0 10 0 0 0
## East Elmhurst 0 0 0 163 0
## East Flatbush 0 433 0 0 0
## East Harlem 0 0 736 0 0
## East Morrisania 9 0 0 0 0
## East New York 0 211 0 0 0
## East Village 0 0 957 0 0
## Eastchester 13 0 0 0 0
## Edenwald 13 0 0 0 0
## Edgemere 0 0 0 10 0
## Elmhurst 0 0 0 179 0
## Eltingville 0 0 0 0 3
## Emerson Hill 0 0 0 0 4
## Far Rockaway 0 0 0 25 0
## Fieldston 7 0 0 0 0
## Financial District 0 0 572 0 0
## Flatbush 0 367 0 0 0
## Flatiron District 0 0 49 0 0
## Flatlands 0 74 0 0 0
## Flushing 0 0 0 369 0
## Fordham 56 0 0 0 0
## Forest Hills 0 0 0 99 0
## Fort Greene 0 287 0 0 0
## Fort Hamilton 0 42 0 0 0
## Fort Wadsworth 0 0 0 0 1
## Fresh Meadows 0 0 0 27 0
## Glendale 0 0 0 42 0
## Gowanus 0 152 0 0 0
## Gramercy 0 0 196 0 0
## Graniteville 0 0 0 0 2
## Grant City 0 0 0 0 6
## Gravesend 0 46 0 0 0
## Great Kills 0 0 0 0 10
## Greenpoint 0 585 0 0 0
## Greenwich Village 0 0 207 0 0
## Grymes Hill 0 0 0 0 6
## Harlem 0 0 1734 0 0
## Hell's Kitchen 0 0 1446 0 0
## Highbridge 24 0 0 0 0
## Hollis 0 0 0 13 0
## Holliswood 0 0 0 3 0
## Howard Beach 0 0 0 16 0
## Howland Hook 0 0 0 0 2
## Huguenot 0 0 0 0 3
## Hunts Point 16 0 0 0 0
## Inwood 0 0 156 0 0
## Jackson Heights 0 0 0 150 0
## Jamaica 0 0 0 211 0
## Jamaica Estates 0 0 0 18 0
## Jamaica Hills 0 0 0 6 0
## Kensington 0 102 0 0 0
## Kew Gardens 0 0 0 26 0
## Kew Gardens Hills 0 0 0 19 0
## Kingsbridge 58 0 0 0 0
## Kips Bay 0 0 278 0 0
## Laurelton 0 0 0 16 0
## Lighthouse Hill 0 0 0 0 2
## Little Italy 0 0 80 0 0
## Little Neck 0 0 0 5 0
## Long Island City 0 0 0 357 0
## Longwood 44 0 0 0 0
## Lower East Side 0 0 530 0 0
## Manhattan Beach 0 6 0 0 0
## Marble Hill 0 0 6 0 0
## Mariners Harbor 0 0 0 0 8
## Maspeth 0 0 0 98 0
## Melrose 7 0 0 0 0
## Middle Village 0 0 0 29 0
## Midland Beach 0 0 0 0 5
## Midtown 0 0 1087 0 0
## Midwood 0 69 0 0 0
## Mill Basin 0 4 0 0 0
## Morningside Heights 0 0 128 0 0
## Morris Heights 14 0 0 0 0
## Morris Park 13 0 0 0 0
## Morrisania 14 0 0 0 0
## Mott Haven 52 0 0 0 0
## Mount Eden 3 0 0 0 0
## Mount Hope 16 0 0 0 0
## Murray Hill 0 0 366 0 0
## Navy Yard 0 6 0 0 0
## Neponsit 0 0 0 3 0
## New Brighton 0 0 0 0 5
## New Dorp 0 0 0 0 0
## New Dorp Beach 0 0 0 0 4
## New Springville 0 0 0 0 6
## NoHo 0 0 53 0 0
## Nolita 0 0 131 0 0
## North Riverdale 8 0 0 0 0
## Norwood 22 0 0 0 0
## Oakwood 0 0 0 0 4
## Olinville 3 0 0 0 0
## Ozone Park 0 0 0 51 0
## Park Slope 0 289 0 0 0
## Parkchester 31 0 0 0 0
## Pelham Bay 16 0 0 0 0
## Pelham Gardens 26 0 0 0 0
## Port Morris 41 0 0 0 0
## Port Richmond 0 0 0 0 5
## Prince's Bay 0 0 0 0 3
## Prospect Heights 0 189 0 0 0
## Prospect-Lefferts Gardens 0 325 0 0 0
## Queens Village 0 0 0 51 0
## Randall Manor 0 0 0 0 19
## Red Hook 0 53 0 0 0
## Rego Park 0 0 0 85 0
## Richmond Hill 0 0 0 83 0
## Richmondtown 0 0 0 0 1
## Ridgewood 0 0 0 277 0
## Riverdale 8 0 0 0 0
## Rockaway Beach 0 0 0 49 0
## Roosevelt Island 0 0 35 0 0
## Rosebank 0 0 0 0 6
## Rosedale 0 0 0 54 0
## Rossville 0 0 0 0 1
## Schuylerville 12 0 0 0 0
## Sea Gate 0 3 0 0 0
## Sheepshead Bay 0 132 0 0 0
## Shore Acres 0 0 0 0 5
## Silver Lake 0 0 0 0 1
## SoHo 0 0 244 0 0
## Soundview 12 0 0 0 0
## South Beach 0 0 0 0 7
## South Ozone Park 0 0 0 38 0
## South Slope 0 168 0 0 0
## Springfield Gardens 0 0 0 79 0
## Spuyten Duyvil 4 0 0 0 0
## St. Albans 0 0 0 72 0
## St. George 0 0 0 0 42
## Stapleton 0 0 0 0 25
## Stuyvesant Town 0 0 18 0 0
## Sunnyside 0 0 0 245 0
## Sunset Park 0 249 0 0 0
## Theater District 0 0 239 0 0
## Throgs Neck 23 0 0 0 0
## Todt Hill 0 0 0 0 3
## Tompkinsville 0 0 0 0 37
## Tottenville 0 0 0 0 7
## Tremont 7 0 0 0 0
## Tribeca 0 0 121 0 0
## Two Bridges 0 0 47 0 0
## Unionport 7 0 0 0 0
## University Heights 15 0 0 0 0
## Upper East Side 0 0 1095 0 0
## Upper West Side 0 0 1089 0 0
## Van Nest 11 0 0 0 0
## Vinegar Hill 0 21 0 0 0
## Wakefield 42 0 0 0 0
## Washington Heights 0 0 548 0 0
## West Brighton 0 0 0 0 18
## West Farms 2 0 0 0 0
## West Village 0 0 418 0 0
## Westchester Square 9 0 0 0 0
## Westerleigh 0 0 0 0 2
## Whitestone 0 0 0 10 0
## Williamsbridge 34 0 0 0 0
## Williamsburg 0 2051 0 0 0
## Willowbrook 0 0 0 0 1
## Windsor Terrace 0 93 0 0 0
## Woodhaven 0 0 0 79 0
## Woodlawn 9 0 0 0 0
## Woodrow 0 0 0 0 0
## Woodside 0 0 0 175 0
## Total 913 12253 13559 4298 331
## Total
## 37
## 3
## 21
## 73
## 548
## 15
## 34
## 106
## 6
## 0
## 6
## 36
## 17
## 2478
## 8
## 12
## 17
## 55
## 10
## 99
## 112
## 3
## 48
## 64
## 13
## 83
## 48
## 4
## 1447
## 20
## 140
## 122
## 9
## 3
## 706
## 226
## 17
## 27
## 28
## 20
## 14
## 322
## 2
## 58
## 15
## 26
## 26
## 39
## 24
## 14
## 62
## 912
## 117
## 210
## 6
## 8
## 38
## 20
## 10
## 163
## 433
## 736
## 9
## 211
## 957
## 13
## 13
## 10
## 179
## 3
## 4
## 25
## 7
## 572
## 367
## 49
## 74
## 369
## 56
## 99
## 287
## 42
## 1
## 27
## 42
## 152
## 196
## 2
## 6
## 46
## 10
## 585
## 207
## 6
## 1734
## 1446
## 24
## 13
## 3
## 16
## 2
## 3
## 16
## 156
## 150
## 211
## 18
## 6
## 102
## 26
## 19
## 58
## 278
## 16
## 2
## 80
## 5
## 357
## 44
## 530
## 6
## 6
## 8
## 98
## 7
## 29
## 5
## 1087
## 69
## 4
## 128
## 14
## 13
## 14
## 52
## 3
## 16
## 366
## 6
## 3
## 5
## 0
## 4
## 6
## 53
## 131
## 8
## 22
## 4
## 3
## 51
## 289
## 31
## 16
## 26
## 41
## 5
## 3
## 189
## 325
## 51
## 19
## 53
## 85
## 83
## 1
## 277
## 8
## 49
## 35
## 6
## 54
## 1
## 12
## 3
## 132
## 5
## 1
## 244
## 12
## 7
## 38
## 168
## 79
## 4
## 72
## 42
## 25
## 18
## 245
## 249
## 239
## 23
## 3
## 37
## 7
## 7
## 121
## 47
## 7
## 15
## 1095
## 1089
## 11
## 21
## 42
## 548
## 18
## 2
## 418
## 9
## 2
## 10
## 34
## 2051
## 1
## 93
## 79
## 9
## 0
## 175
## 31354
fp %>%
tabyl(neighbourhood, neighbourhood_group) %>%
adorn_totals(where = c("row", "col")) %>%
adorn_percentages(denominator = "all") %>% # creates proportions
adorn_rounding(2) # round decimals
## neighbourhood Bronx Brooklyn Manhattan Queens Staten Island
## Allerton 0.00 0.00 0.00 0.00 0.00
## Arden Heights 0.00 0.00 0.00 0.00 0.00
## Arrochar 0.00 0.00 0.00 0.00 0.00
## Arverne 0.00 0.00 0.00 0.00 0.00
## Astoria 0.00 0.00 0.00 0.02 0.00
## Bath Beach 0.00 0.00 0.00 0.00 0.00
## Battery Park City 0.00 0.00 0.00 0.00 0.00
## Bay Ridge 0.00 0.00 0.00 0.00 0.00
## Bay Terrace 0.00 0.00 0.00 0.00 0.00
## Bay Terrace, Staten Island 0.00 0.00 0.00 0.00 0.00
## Baychester 0.00 0.00 0.00 0.00 0.00
## Bayside 0.00 0.00 0.00 0.00 0.00
## Bayswater 0.00 0.00 0.00 0.00 0.00
## Bedford-Stuyvesant 0.00 0.08 0.00 0.00 0.00
## Belle Harbor 0.00 0.00 0.00 0.00 0.00
## Bellerose 0.00 0.00 0.00 0.00 0.00
## Belmont 0.00 0.00 0.00 0.00 0.00
## Bensonhurst 0.00 0.00 0.00 0.00 0.00
## Bergen Beach 0.00 0.00 0.00 0.00 0.00
## Boerum Hill 0.00 0.00 0.00 0.00 0.00
## Borough Park 0.00 0.00 0.00 0.00 0.00
## Breezy Point 0.00 0.00 0.00 0.00 0.00
## Briarwood 0.00 0.00 0.00 0.00 0.00
## Brighton Beach 0.00 0.00 0.00 0.00 0.00
## Bronxdale 0.00 0.00 0.00 0.00 0.00
## Brooklyn Heights 0.00 0.00 0.00 0.00 0.00
## Brownsville 0.00 0.00 0.00 0.00 0.00
## Bull's Head 0.00 0.00 0.00 0.00 0.00
## Bushwick 0.00 0.05 0.00 0.00 0.00
## Cambria Heights 0.00 0.00 0.00 0.00 0.00
## Canarsie 0.00 0.00 0.00 0.00 0.00
## Carroll Gardens 0.00 0.00 0.00 0.00 0.00
## Castle Hill 0.00 0.00 0.00 0.00 0.00
## Castleton Corners 0.00 0.00 0.00 0.00 0.00
## Chelsea 0.00 0.00 0.02 0.00 0.00
## Chinatown 0.00 0.00 0.01 0.00 0.00
## City Island 0.00 0.00 0.00 0.00 0.00
## Civic Center 0.00 0.00 0.00 0.00 0.00
## Claremont Village 0.00 0.00 0.00 0.00 0.00
## Clason Point 0.00 0.00 0.00 0.00 0.00
## Clifton 0.00 0.00 0.00 0.00 0.00
## Clinton Hill 0.00 0.01 0.00 0.00 0.00
## Co-op City 0.00 0.00 0.00 0.00 0.00
## Cobble Hill 0.00 0.00 0.00 0.00 0.00
## College Point 0.00 0.00 0.00 0.00 0.00
## Columbia St 0.00 0.00 0.00 0.00 0.00
## Concord 0.00 0.00 0.00 0.00 0.00
## Concourse 0.00 0.00 0.00 0.00 0.00
## Concourse Village 0.00 0.00 0.00 0.00 0.00
## Coney Island 0.00 0.00 0.00 0.00 0.00
## Corona 0.00 0.00 0.00 0.00 0.00
## Crown Heights 0.00 0.03 0.00 0.00 0.00
## Cypress Hills 0.00 0.00 0.00 0.00 0.00
## Ditmars Steinway 0.00 0.00 0.00 0.01 0.00
## Dongan Hills 0.00 0.00 0.00 0.00 0.00
## Douglaston 0.00 0.00 0.00 0.00 0.00
## Downtown Brooklyn 0.00 0.00 0.00 0.00 0.00
## DUMBO 0.00 0.00 0.00 0.00 0.00
## Dyker Heights 0.00 0.00 0.00 0.00 0.00
## East Elmhurst 0.00 0.00 0.00 0.01 0.00
## East Flatbush 0.00 0.01 0.00 0.00 0.00
## East Harlem 0.00 0.00 0.02 0.00 0.00
## East Morrisania 0.00 0.00 0.00 0.00 0.00
## East New York 0.00 0.01 0.00 0.00 0.00
## East Village 0.00 0.00 0.03 0.00 0.00
## Eastchester 0.00 0.00 0.00 0.00 0.00
## Edenwald 0.00 0.00 0.00 0.00 0.00
## Edgemere 0.00 0.00 0.00 0.00 0.00
## Elmhurst 0.00 0.00 0.00 0.01 0.00
## Eltingville 0.00 0.00 0.00 0.00 0.00
## Emerson Hill 0.00 0.00 0.00 0.00 0.00
## Far Rockaway 0.00 0.00 0.00 0.00 0.00
## Fieldston 0.00 0.00 0.00 0.00 0.00
## Financial District 0.00 0.00 0.02 0.00 0.00
## Flatbush 0.00 0.01 0.00 0.00 0.00
## Flatiron District 0.00 0.00 0.00 0.00 0.00
## Flatlands 0.00 0.00 0.00 0.00 0.00
## Flushing 0.00 0.00 0.00 0.01 0.00
## Fordham 0.00 0.00 0.00 0.00 0.00
## Forest Hills 0.00 0.00 0.00 0.00 0.00
## Fort Greene 0.00 0.01 0.00 0.00 0.00
## Fort Hamilton 0.00 0.00 0.00 0.00 0.00
## Fort Wadsworth 0.00 0.00 0.00 0.00 0.00
## Fresh Meadows 0.00 0.00 0.00 0.00 0.00
## Glendale 0.00 0.00 0.00 0.00 0.00
## Gowanus 0.00 0.00 0.00 0.00 0.00
## Gramercy 0.00 0.00 0.01 0.00 0.00
## Graniteville 0.00 0.00 0.00 0.00 0.00
## Grant City 0.00 0.00 0.00 0.00 0.00
## Gravesend 0.00 0.00 0.00 0.00 0.00
## Great Kills 0.00 0.00 0.00 0.00 0.00
## Greenpoint 0.00 0.02 0.00 0.00 0.00
## Greenwich Village 0.00 0.00 0.01 0.00 0.00
## Grymes Hill 0.00 0.00 0.00 0.00 0.00
## Harlem 0.00 0.00 0.06 0.00 0.00
## Hell's Kitchen 0.00 0.00 0.05 0.00 0.00
## Highbridge 0.00 0.00 0.00 0.00 0.00
## Hollis 0.00 0.00 0.00 0.00 0.00
## Holliswood 0.00 0.00 0.00 0.00 0.00
## Howard Beach 0.00 0.00 0.00 0.00 0.00
## Howland Hook 0.00 0.00 0.00 0.00 0.00
## Huguenot 0.00 0.00 0.00 0.00 0.00
## Hunts Point 0.00 0.00 0.00 0.00 0.00
## Inwood 0.00 0.00 0.00 0.00 0.00
## Jackson Heights 0.00 0.00 0.00 0.00 0.00
## Jamaica 0.00 0.00 0.00 0.01 0.00
## Jamaica Estates 0.00 0.00 0.00 0.00 0.00
## Jamaica Hills 0.00 0.00 0.00 0.00 0.00
## Kensington 0.00 0.00 0.00 0.00 0.00
## Kew Gardens 0.00 0.00 0.00 0.00 0.00
## Kew Gardens Hills 0.00 0.00 0.00 0.00 0.00
## Kingsbridge 0.00 0.00 0.00 0.00 0.00
## Kips Bay 0.00 0.00 0.01 0.00 0.00
## Laurelton 0.00 0.00 0.00 0.00 0.00
## Lighthouse Hill 0.00 0.00 0.00 0.00 0.00
## Little Italy 0.00 0.00 0.00 0.00 0.00
## Little Neck 0.00 0.00 0.00 0.00 0.00
## Long Island City 0.00 0.00 0.00 0.01 0.00
## Longwood 0.00 0.00 0.00 0.00 0.00
## Lower East Side 0.00 0.00 0.02 0.00 0.00
## Manhattan Beach 0.00 0.00 0.00 0.00 0.00
## Marble Hill 0.00 0.00 0.00 0.00 0.00
## Mariners Harbor 0.00 0.00 0.00 0.00 0.00
## Maspeth 0.00 0.00 0.00 0.00 0.00
## Melrose 0.00 0.00 0.00 0.00 0.00
## Middle Village 0.00 0.00 0.00 0.00 0.00
## Midland Beach 0.00 0.00 0.00 0.00 0.00
## Midtown 0.00 0.00 0.03 0.00 0.00
## Midwood 0.00 0.00 0.00 0.00 0.00
## Mill Basin 0.00 0.00 0.00 0.00 0.00
## Morningside Heights 0.00 0.00 0.00 0.00 0.00
## Morris Heights 0.00 0.00 0.00 0.00 0.00
## Morris Park 0.00 0.00 0.00 0.00 0.00
## Morrisania 0.00 0.00 0.00 0.00 0.00
## Mott Haven 0.00 0.00 0.00 0.00 0.00
## Mount Eden 0.00 0.00 0.00 0.00 0.00
## Mount Hope 0.00 0.00 0.00 0.00 0.00
## Murray Hill 0.00 0.00 0.01 0.00 0.00
## Navy Yard 0.00 0.00 0.00 0.00 0.00
## Neponsit 0.00 0.00 0.00 0.00 0.00
## New Brighton 0.00 0.00 0.00 0.00 0.00
## New Dorp 0.00 0.00 0.00 0.00 0.00
## New Dorp Beach 0.00 0.00 0.00 0.00 0.00
## New Springville 0.00 0.00 0.00 0.00 0.00
## NoHo 0.00 0.00 0.00 0.00 0.00
## Nolita 0.00 0.00 0.00 0.00 0.00
## North Riverdale 0.00 0.00 0.00 0.00 0.00
## Norwood 0.00 0.00 0.00 0.00 0.00
## Oakwood 0.00 0.00 0.00 0.00 0.00
## Olinville 0.00 0.00 0.00 0.00 0.00
## Ozone Park 0.00 0.00 0.00 0.00 0.00
## Park Slope 0.00 0.01 0.00 0.00 0.00
## Parkchester 0.00 0.00 0.00 0.00 0.00
## Pelham Bay 0.00 0.00 0.00 0.00 0.00
## Pelham Gardens 0.00 0.00 0.00 0.00 0.00
## Port Morris 0.00 0.00 0.00 0.00 0.00
## Port Richmond 0.00 0.00 0.00 0.00 0.00
## Prince's Bay 0.00 0.00 0.00 0.00 0.00
## Prospect Heights 0.00 0.01 0.00 0.00 0.00
## Prospect-Lefferts Gardens 0.00 0.01 0.00 0.00 0.00
## Queens Village 0.00 0.00 0.00 0.00 0.00
## Randall Manor 0.00 0.00 0.00 0.00 0.00
## Red Hook 0.00 0.00 0.00 0.00 0.00
## Rego Park 0.00 0.00 0.00 0.00 0.00
## Richmond Hill 0.00 0.00 0.00 0.00 0.00
## Richmondtown 0.00 0.00 0.00 0.00 0.00
## Ridgewood 0.00 0.00 0.00 0.01 0.00
## Riverdale 0.00 0.00 0.00 0.00 0.00
## Rockaway Beach 0.00 0.00 0.00 0.00 0.00
## Roosevelt Island 0.00 0.00 0.00 0.00 0.00
## Rosebank 0.00 0.00 0.00 0.00 0.00
## Rosedale 0.00 0.00 0.00 0.00 0.00
## Rossville 0.00 0.00 0.00 0.00 0.00
## Schuylerville 0.00 0.00 0.00 0.00 0.00
## Sea Gate 0.00 0.00 0.00 0.00 0.00
## Sheepshead Bay 0.00 0.00 0.00 0.00 0.00
## Shore Acres 0.00 0.00 0.00 0.00 0.00
## Silver Lake 0.00 0.00 0.00 0.00 0.00
## SoHo 0.00 0.00 0.01 0.00 0.00
## Soundview 0.00 0.00 0.00 0.00 0.00
## South Beach 0.00 0.00 0.00 0.00 0.00
## South Ozone Park 0.00 0.00 0.00 0.00 0.00
## South Slope 0.00 0.01 0.00 0.00 0.00
## Springfield Gardens 0.00 0.00 0.00 0.00 0.00
## Spuyten Duyvil 0.00 0.00 0.00 0.00 0.00
## St. Albans 0.00 0.00 0.00 0.00 0.00
## St. George 0.00 0.00 0.00 0.00 0.00
## Stapleton 0.00 0.00 0.00 0.00 0.00
## Stuyvesant Town 0.00 0.00 0.00 0.00 0.00
## Sunnyside 0.00 0.00 0.00 0.01 0.00
## Sunset Park 0.00 0.01 0.00 0.00 0.00
## Theater District 0.00 0.00 0.01 0.00 0.00
## Throgs Neck 0.00 0.00 0.00 0.00 0.00
## Todt Hill 0.00 0.00 0.00 0.00 0.00
## Tompkinsville 0.00 0.00 0.00 0.00 0.00
## Tottenville 0.00 0.00 0.00 0.00 0.00
## Tremont 0.00 0.00 0.00 0.00 0.00
## Tribeca 0.00 0.00 0.00 0.00 0.00
## Two Bridges 0.00 0.00 0.00 0.00 0.00
## Unionport 0.00 0.00 0.00 0.00 0.00
## University Heights 0.00 0.00 0.00 0.00 0.00
## Upper East Side 0.00 0.00 0.03 0.00 0.00
## Upper West Side 0.00 0.00 0.03 0.00 0.00
## Van Nest 0.00 0.00 0.00 0.00 0.00
## Vinegar Hill 0.00 0.00 0.00 0.00 0.00
## Wakefield 0.00 0.00 0.00 0.00 0.00
## Washington Heights 0.00 0.00 0.02 0.00 0.00
## West Brighton 0.00 0.00 0.00 0.00 0.00
## West Farms 0.00 0.00 0.00 0.00 0.00
## West Village 0.00 0.00 0.01 0.00 0.00
## Westchester Square 0.00 0.00 0.00 0.00 0.00
## Westerleigh 0.00 0.00 0.00 0.00 0.00
## Whitestone 0.00 0.00 0.00 0.00 0.00
## Williamsbridge 0.00 0.00 0.00 0.00 0.00
## Williamsburg 0.00 0.07 0.00 0.00 0.00
## Willowbrook 0.00 0.00 0.00 0.00 0.00
## Windsor Terrace 0.00 0.00 0.00 0.00 0.00
## Woodhaven 0.00 0.00 0.00 0.00 0.00
## Woodlawn 0.00 0.00 0.00 0.00 0.00
## Woodrow 0.00 0.00 0.00 0.00 0.00
## Woodside 0.00 0.00 0.00 0.01 0.00
## Total 0.03 0.39 0.43 0.14 0.01
## Total
## 0.00
## 0.00
## 0.00
## 0.00
## 0.02
## 0.00
## 0.00
## 0.00
## 0.00
## 0.00
## 0.00
## 0.00
## 0.00
## 0.08
## 0.00
## 0.00
## 0.00
## 0.00
## 0.00
## 0.00
## 0.00
## 0.00
## 0.00
## 0.00
## 0.00
## 0.00
## 0.00
## 0.00
## 0.05
## 0.00
## 0.00
## 0.00
## 0.00
## 0.00
## 0.02
## 0.01
## 0.00
## 0.00
## 0.00
## 0.00
## 0.00
## 0.01
## 0.00
## 0.00
## 0.00
## 0.00
## 0.00
## 0.00
## 0.00
## 0.00
## 0.00
## 0.03
## 0.00
## 0.01
## 0.00
## 0.00
## 0.00
## 0.00
## 0.00
## 0.01
## 0.01
## 0.02
## 0.00
## 0.01
## 0.03
## 0.00
## 0.00
## 0.00
## 0.01
## 0.00
## 0.00
## 0.00
## 0.00
## 0.02
## 0.01
## 0.00
## 0.00
## 0.01
## 0.00
## 0.00
## 0.01
## 0.00
## 0.00
## 0.00
## 0.00
## 0.00
## 0.01
## 0.00
## 0.00
## 0.00
## 0.00
## 0.02
## 0.01
## 0.00
## 0.06
## 0.05
## 0.00
## 0.00
## 0.00
## 0.00
## 0.00
## 0.00
## 0.00
## 0.00
## 0.00
## 0.01
## 0.00
## 0.00
## 0.00
## 0.00
## 0.00
## 0.00
## 0.01
## 0.00
## 0.00
## 0.00
## 0.00
## 0.01
## 0.00
## 0.02
## 0.00
## 0.00
## 0.00
## 0.00
## 0.00
## 0.00
## 0.00
## 0.03
## 0.00
## 0.00
## 0.00
## 0.00
## 0.00
## 0.00
## 0.00
## 0.00
## 0.00
## 0.01
## 0.00
## 0.00
## 0.00
## 0.00
## 0.00
## 0.00
## 0.00
## 0.00
## 0.00
## 0.00
## 0.00
## 0.00
## 0.00
## 0.01
## 0.00
## 0.00
## 0.00
## 0.00
## 0.00
## 0.00
## 0.01
## 0.01
## 0.00
## 0.00
## 0.00
## 0.00
## 0.00
## 0.00
## 0.01
## 0.00
## 0.00
## 0.00
## 0.00
## 0.00
## 0.00
## 0.00
## 0.00
## 0.00
## 0.00
## 0.00
## 0.01
## 0.00
## 0.00
## 0.00
## 0.01
## 0.00
## 0.00
## 0.00
## 0.00
## 0.00
## 0.00
## 0.01
## 0.01
## 0.01
## 0.00
## 0.00
## 0.00
## 0.00
## 0.00
## 0.00
## 0.00
## 0.00
## 0.00
## 0.03
## 0.03
## 0.00
## 0.00
## 0.00
## 0.02
## 0.00
## 0.00
## 0.01
## 0.00
## 0.00
## 0.00
## 0.00
## 0.07
## 0.00
## 0.00
## 0.00
## 0.00
## 0.00
## 0.01
## 1.00
# Correlation table
fp %>%
select_if(is.numeric) %>% # Use to select just the numeric variables
cor() %>%
round(2) %>%
kable()
| latitude | longitude | price | minimum_nights | number_of_reviews | reviews_per_month | calculated_host_listings_count | availability_365 | list_history | min_spend | |
|---|---|---|---|---|---|---|---|---|---|---|
| latitude | 1.00 | 0.08 | 0.03 | 0.04 | -0.01 | -0.02 | 0.03 | 0.00 | NA | 0.01 |
| longitude | 0.08 | 1.00 | -0.16 | -0.08 | 0.03 | 0.12 | -0.15 | 0.03 | NA | -0.05 |
| price | 0.03 | -0.16 | 1.00 | 0.04 | -0.07 | -0.08 | 0.06 | 0.07 | NA | 0.46 |
| minimum_nights | 0.04 | -0.08 | 0.04 | 1.00 | -0.12 | -0.18 | 0.12 | 0.13 | NA | 0.39 |
| number_of_reviews | -0.01 | 0.03 | -0.07 | -0.12 | 1.00 | 0.54 | -0.12 | 0.01 | NA | -0.05 |
| reviews_per_month | -0.02 | 0.12 | -0.08 | -0.18 | 0.54 | 1.00 | -0.11 | -0.09 | NA | -0.07 |
| calculated_host_listings_count | 0.03 | -0.15 | 0.06 | 0.12 | -0.12 | -0.11 | 1.00 | 0.19 | NA | 0.06 |
| availability_365 | 0.00 | 0.03 | 0.07 | 0.13 | 0.01 | -0.09 | 0.19 | 1.00 | NA | 0.06 |
| list_history | NA | NA | NA | NA | NA | NA | NA | NA | 1 | NA |
| min_spend | 0.01 | -0.05 | 0.46 | 0.39 | -0.05 | -0.07 | 0.06 | 0.06 | NA | 1.00 |
fp %>%
inspect_cor() %>%
show_plot()
grid.arrange(
# neighbourhood_group and room_type
fp %>%
ggplot(mapping = aes(x = neighbourhood_group, fill = room_type)) +
geom_bar(position = "dodge"),
ncol = 1
)
grid.arrange(
fp %>%
ggplot(mapping = aes(x = neighbourhood_group, fill = room_type)) +
geom_bar(position = "dodge") +
coord_flip(),
fp %>%
ggplot(mapping = aes(x = neighbourhood_group, fill = room_type)) +
geom_bar(position = "fill") +
coord_flip(),
ncol = 1
)
fp %>%
group_by(neighbourhood_group, room_type) %>%
summarise(count = n()) %>%
ggplot(aes(neighbourhood_group, room_type)) +
geom_tile(aes(fill = count))
# price and minimum_nights
fp %>%
ggplot(mapping = aes(x = price, y = minimum_nights)) +
geom_point()
# price and number_of_reviews
fp %>%
ggplot(mapping = aes(x = price, y = number_of_reviews)) +
geom_point()
# price and reviews_per_month
fp %>%
ggplot(mapping = aes(x = price, y = reviews_per_month)) +
geom_point()
# price and availability_365
fp %>%
ggplot(mapping = aes(x = price, y = availability_365)) +
geom_point()
# calculated_host_listings_count and availability_365
fp %>%
ggplot(mapping = aes(x = calculated_host_listings_count, y = availability_365)) +
geom_point()
# calculated_host_listings_count and minimum_nights
fp %>%
ggplot(mapping = aes(x = calculated_host_listings_count, y = minimum_nights)) +
geom_point()
# longitude and latitude
fp %>%
ggplot(mapping = aes(x = longitude, y = latitude)) +
geom_point()
fp %>%
select(neighbourhood_group, room_type, price, minimum_nights, number_of_reviews, reviews_per_month, calculated_host_listings_count, availability_365) %>%
ggpairs()
# Correlation of variables
fp %>%
plot_correlation(maxcat = 5L)
# minimum_nights, price and neighbourhood_group
fp %>%
ggplot(mapping = aes(x = minimum_nights, y = price, color = neighbourhood_group)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE)
# availability_365, price and neighbourhood_group
fp %>%
ggplot(mapping = aes(x = availability_365, y = price, color = neighbourhood_group)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE)
# reviews_per_month, price and neighbourhood_group
fp %>%
ggplot(mapping = aes(x = reviews_per_month, y = number_of_reviews, color = neighbourhood_group)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE)
# availability_365, calculated_host_listings_count and neighbourhood_group
fp %>%
ggplot(mapping = aes(x = availability_365, y = calculated_host_listings_count, color = neighbourhood_group)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE)
grid.arrange(
# neighbourhood_group and price
fp %>%
ggplot(mapping = aes(x = neighbourhood_group, y = price)) +
geom_boxplot(),
# room_type and price
fp %>%
ggplot(mapping = aes(x = room_type, y = price)) +
geom_boxplot(),
# neighbourhood_group and minimum_nights
fp %>%
ggplot(mapping = aes(x = neighbourhood_group, y = minimum_nights)) +
geom_boxplot(),
# room_type and minimum_nights
fp %>%
ggplot(mapping = aes(x = room_type, y = minimum_nights)) +
geom_boxplot(),
# neighbourhood_group and number_of_reviews
fp %>%
ggplot(mapping = aes(x = neighbourhood_group, y = number_of_reviews)) +
geom_boxplot(),
# room_type and number_of_reviews
fp %>%
ggplot(mapping = aes(x = room_type, y = number_of_reviews)) +
geom_boxplot(),
# neighbourhood_group and reviews_per_month
fp %>%
ggplot(mapping = aes(x = neighbourhood_group, y = reviews_per_month)) +
geom_boxplot(),
# room_type and reviews_per_month
fp %>%
ggplot(mapping = aes(x = room_type, y = reviews_per_month)) +
geom_boxplot(),
ncol = 2
)
fp %>%
group_by(neighbourhood_group, room_type) %>%
summarise(med_price = median(price)) %>%
ggplot(aes(neighbourhood_group, room_type)) +
geom_tile(aes(fill = med_price))
fp %>%
group_by(neighbourhood_group, room_type) %>%
summarise(med_nights = median(minimum_nights)) %>%
ggplot(aes(neighbourhood_group, room_type)) +
geom_tile(aes(fill = med_nights))
fp %>%
group_by(neighbourhood_group, room_type) %>%
summarise(med_reviews = median(number_of_reviews)) %>%
ggplot(aes(neighbourhood_group, room_type)) +
geom_tile(aes(fill = med_reviews))
fp %>%
group_by(neighbourhood_group, room_type) %>%
summarise(med_month_reviews = median(reviews_per_month)) %>%
ggplot(aes(neighbourhood_group, room_type)) +
geom_tile(aes(fill = med_month_reviews))
fp %>%
group_by(neighbourhood_group, room_type) %>%
summarise(med_lists = median(calculated_host_listings_count)) %>%
ggplot(aes(neighbourhood_group, room_type)) +
geom_tile(aes(fill = med_lists))
What is the price distribution?
ggplot(fp, aes(price)) +
geom_histogram(bins = 30, aes(y = ..density..)) +
geom_density(alpha = 0.5) +
geom_vline(xintercept = mean(fp$price), size = 2, linetype = 3) +
annotate("text", x = 1800, y = 0.75,label = paste("Mean price = ", paste0(round(mean(fp$price), 2))),
color = "#32CD32", size = 8) +
scale_x_log10(breaks = seq(0, 10000, 100))
neighbor_mean <- fp %>%
group_by(neighbourhood_group) %>%
summarise(price = round(mean(price), 2))
ggplot(fp, aes(price)) +
geom_histogram(bins = 30, aes(y = ..density..)) +
geom_density(alpha = 0.2) +
ggtitle("Transformed distribution of price\n by neighbourhood groups",
subtitle = expression("With" ~'log'[10] ~ "transformation of x-axis")) +
geom_vline(data = neighbor_mean, aes(xintercept = price), size = 2, linetype = 3) +
geom_text(data = neighbor_mean,y = 1.5, aes(x = price + 1400, label = paste("Mean = ",price)), color = "darkgreen", size = 4) +
facet_wrap(~neighbourhood_group) +
scale_x_log10()
fp %>% filter(price >= mean(price)) %>% group_by(neighbourhood_group, room_type) %>% tally %>%
ggplot(aes(reorder(neighbourhood_group,desc(n)), n, fill = room_type)) +
xlab(NULL) +
ylab(NULL) +
ggtitle("Number of above average price listings",
subtitle = "Most of them are entire homes or apartments") +
geom_bar(stat = "identity")
More interested in room_type
fp %>%
group_by(neighbourhood_group) %>%
ggplot(aes(x = room_type, y = price)) +
geom_boxplot(aes(fill = room_type)) + scale_y_log10() +
xlab("Room type") +
ylab("Price") +
ggtitle("Boxplots of price by room type",
subtitle = "Entire homes and apartments have the highest avg price") +
geom_hline(yintercept = mean(fp$price), color = "purple", linetype = 2)
How does it look like with median of room_type and neighbourhood
# Median of price by Room type
med_price <- fp %>% group_by(neighbourhood_group, room_type) %>% summarise(med_price = median(price ), n = n())
med_price %>% ggplot(aes(x = neighbourhood_group, y = med_price, fill = paste(room_type))) +
geom_bar(stat = 'identity', position = 'dodge')
# Median of minimum_nights by Room type
med_nights <- fp %>% group_by(neighbourhood_group, room_type) %>% summarise(med_night = mean(minimum_nights),n = n())
med_nights %>% ggplot(aes(x = neighbourhood_group, y = med_night, fill = paste(room_type))) +
geom_bar(stat = 'identity', position = 'dodge')
# Median of minimum cost (price*minimum_nights)s by Room type
med_min_spend <- fp %>% group_by(neighbourhood_group, room_type) %>% summarise(med_cost = median(price * minimum_nights),n = n())
med_min_spend %>% ggplot(aes(x = reorder(neighbourhood_group, -med_cost), y = med_cost, fill = paste(room_type))) +
geom_bar(stat = 'identity', position = 'dodge')+
labs(x ="", y="", title = "Entire Room listings of Manhattan's minimum spending is outstanding", subtitle = "About double as Brooklyn's entire home", caption = "From Technical Appendix") +
theme_classic() +
theme(legend.title = element_blank(),
legend.position = "bottom") +
scale_y_continuous(breaks = seq(0, 900,200), labels = scales::dollar) +
scale_fill_discrete_qualitative(palette = "Cold") +
ggsave("min_spend.png")
## Saving 7 x 5 in image
Let’s see it on the map
fp %>%
ggplot(aes(longitude, latitude)) +
geom_hex() +
scale_fill_gradient(low = 'yellow', high = 'red', breaks = c(500, 1000)) +
labs(x = 'Longitude', y = 'Latitude') +
facet_wrap(~ room_type) +
theme(legend.position = 'bottom')
Median price by neighbourhood
neighbour_median <- fp %>%
group_by(neighbourhood) %>%
summarise(median(price))
colnames(neighbour_median)[2] <- "med_price_neighbour"
fp_subset <- merge(fp, neighbour_median)
staten$neighbourhood[staten$price == 800]
## [1] Fort Wadsworth
## 221 Levels: Allerton Arden Heights Arrochar Arverne Astoria ... Woodside
fp_subset <- subset(fp_subset, neighbourhood != "Fort Wadsworth")
save_p_1 <- ggmap(get_googlemap(center = c(lon = -73.95, lat = 40.72),
zoom = 11, scale = 4,
maptype ='terrain',
color = 'color')) +
geom_point(aes(x = longitude, y = latitude, colour = med_price_neighbour), data = fp_subset, size = 0.1) +
scale_colour_gradientn(colours = rainbow(3)) +
labs(x = "", y = "", title = "Lower Manhattan has the highest central tendency of price ", subtitle = "Surprisingly Upper East/West side and Brooklyn seem to be similar in price range", caption = "From Technical Appendix", col="Price Range in USD") +
theme(axis.text = element_blank(),
axis.ticks = element_blank()
)
## Source : https://maps.googleapis.com/maps/api/staticmap?center=40.72,-73.95&zoom=11&size=640x640&scale=4&maptype=terrain&key=xxx
save_p_1
## Warning: Removed 137 rows containing missing values (geom_point).
ggsave(filename = "geo.png", plot = save_p_1)
## Saving 7 x 5 in image
## Warning: Removed 137 rows containing missing values (geom_point).
pal <- colorNumeric(
palette = "Blues",
domain = fp$med_price_neighbour)
## Warning: Unknown or uninitialised column: 'med_price_neighbour'.
leaflet(options = leafletOptions(minZoom = 0, maxZoom = 18)) %>%
setView(lng = -73.95, lat = 40.72, zoom = 12) %>%
addMarkers(data = fp, lat = ~ latitude, lng = ~ longitude, clusterOptions = markerClusterOptions(), label = ~htmlEscape(name),
popup = paste("Price per night: $", fp$price, "<br>",
"Room Type:", fp$room_type, "<br>",
"Minimum Stay:", fp$minimum_nights, "days<br>",
"Minimum Cost for stay: $", fp$minimum_nights * fp$price, "<br>",
"Monthly Average Number of Reviews: ", fp$reviews_per_month)
) %>%
addTiles()
Does Room_type impact the price?
(t <- t.test(fp$price[fp$room_type == 'Entire home/apt'], fp$price[fp$room_type == 'Private room' ], conf.level = 0.95))
##
## Welch Two Sample t-test
##
## data: fp$price[fp$room_type == "Entire home/apt"] and fp$price[fp$room_type == "Private room"]
## t = 47.672, df = 27227, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 125.2677 136.0103
## sample estimates:
## mean of x mean of y
## 224.614 93.975
(t <- t.test(fp$price[fp$room_type == 'Entire home/apt'], fp$price[fp$room_type == 'Shared room' ], conf.level = 0.95))
##
## Welch Two Sample t-test
##
## data: fp$price[fp$room_type == "Entire home/apt"] and fp$price[fp$room_type == "Shared room"]
## t = 39.039, df = 1870.9, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 150.5066 166.4289
## sample estimates:
## mean of x mean of y
## 224.61396 66.14617
(t <- t.test(fp$price[fp$room_type == 'Private room'], fp$price[fp$room_type == 'Shared room' ], conf.level = 0.95))
##
## Welch Two Sample t-test
##
## data: fp$price[fp$room_type == "Private room"] and fp$price[fp$room_type == "Shared room"]
## t = 7.6441, df = 1222.3, p-value = 4.238e-14
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 20.68636 34.97129
## sample estimates:
## mean of x mean of y
## 93.97500 66.14617
z <- qnorm(0.975) #95 percent
fp %>%
group_by(room_type) %>%
summarise(mn = mean(price), sd = sd(price), n = n(), ci = z * sd/sqrt(n)) %>%
ggplot(aes(x = room_type, y = mn)) +
geom_bar(stat = "identity", position = "dodge") +
geom_errorbar(aes(ymin = mn - ci, ymax = mn + ci), width = 0.5, position = position_dodge(0.9)) +
labs(title = "Price difference by Room type with error bar")
chisq.test(table(fp$neighbourhood_group, fp$room_type))
##
## Pearson's Chi-squared test
##
## data: table(fp$neighbourhood_group, fp$room_type)
## X-squared = 1169.7, df = 8, p-value < 2.2e-16
How do neighbourhood_group’s listing price vary with room_type
C_P_n <- fp %>%
group_by(neighbourhood_group, room_type) %>%
summarise(n = n())
C_P_n_ci <- multinomialCI(t(C_P_n[, 3]), 0.05)
C_P_tab <- fp %>%
group_by(neighbourhood_group, room_type) %>%
summarise(prop = round(n()/sum(nrow(fp)), 3))
C_P_tab$ci_l <- round(C_P_n_ci[,1], 3)
C_P_tab$ci_u <- round(C_P_n_ci[,2], 3)
htmlTable(C_P_tab)
| neighbourhood_group | room_type | prop | ci_l | ci_u | |
|---|---|---|---|---|---|
| 1 | Bronx | Entire home/apt | 0.01 | 0.005 | 0.016 |
| 2 | Bronx | Private room | 0.017 | 0.012 | 0.023 |
| 3 | Bronx | Shared room | 0.002 | 0 | 0.007 |
| 4 | Brooklyn | Entire home/apt | 0.194 | 0.188 | 0.199 |
| 5 | Brooklyn | Private room | 0.187 | 0.182 | 0.193 |
| 6 | Brooklyn | Shared room | 0.01 | 0.004 | 0.015 |
| 7 | Manhattan | Entire home/apt | 0.269 | 0.264 | 0.275 |
| 8 | Manhattan | Private room | 0.153 | 0.147 | 0.158 |
| 9 | Manhattan | Shared room | 0.011 | 0.005 | 0.016 |
| 10 | Queens | Entire home/apt | 0.049 | 0.044 | 0.055 |
| 11 | Queens | Private room | 0.082 | 0.077 | 0.088 |
| 12 | Queens | Shared room | 0.005 | 0 | 0.011 |
| 13 | Staten Island | Entire home/apt | 0.005 | 0 | 0.01 |
| 14 | Staten Island | Private room | 0.005 | 0 | 0.011 |
| 15 | Staten Island | Shared room | 0 | 0 | 0.006 |
# Graph of proportions with confidence intervals
C_P_tab %>%
ggplot(aes(x = room_type, y = prop, fill = neighbourhood_group)) +
geom_bar(stat="identity", position = "dodge") +
geom_text(aes(label = round(prop, 2)), vjust = -4, color = "black", # vjust moves lables above CI
position = position_dodge(0.9), size = 4) +
geom_errorbar(aes(ymin = ci_l, ymax = ci_u),
width = 0.4, position = position_dodge(0.9))
Significance of Correlation
fp %>%
select_if(is.numeric) %>%
as.matrix() %>%
rcorr()
## latitude longitude price minimum_nights
## latitude 1.00 0.08 0.03 0.04
## longitude 0.08 1.00 -0.16 -0.08
## price 0.03 -0.16 1.00 0.04
## minimum_nights 0.04 -0.08 0.04 1.00
## number_of_reviews -0.01 0.03 -0.07 -0.12
## reviews_per_month -0.02 0.12 -0.08 -0.18
## calculated_host_listings_count 0.03 -0.15 0.06 0.12
## availability_365 0.00 0.03 0.07 0.13
## list_history 0.01 -0.08 0.00 0.07
## min_spend 0.01 -0.05 0.46 0.39
## number_of_reviews reviews_per_month
## latitude -0.01 -0.02
## longitude 0.03 0.12
## price -0.07 -0.08
## minimum_nights -0.12 -0.18
## number_of_reviews 1.00 0.54
## reviews_per_month 0.54 1.00
## calculated_host_listings_count -0.12 -0.11
## availability_365 0.01 -0.09
## list_history 0.50 -0.20
## min_spend -0.05 -0.07
## calculated_host_listings_count
## latitude 0.03
## longitude -0.15
## price 0.06
## minimum_nights 0.12
## number_of_reviews -0.12
## reviews_per_month -0.11
## calculated_host_listings_count 1.00
## availability_365 0.19
## list_history -0.10
## min_spend 0.06
## availability_365 list_history min_spend
## latitude 0.00 0.01 0.01
## longitude 0.03 -0.08 -0.05
## price 0.07 0.00 0.46
## minimum_nights 0.13 0.07 0.39
## number_of_reviews 0.01 0.50 -0.05
## reviews_per_month -0.09 -0.20 -0.07
## calculated_host_listings_count 0.19 -0.10 0.06
## availability_365 1.00 0.14 0.06
## list_history 0.14 1.00 0.03
## min_spend 0.06 0.03 1.00
##
## n
## latitude longitude price minimum_nights
## latitude 31354 31354 31354 31354
## longitude 31354 31354 31354 31354
## price 31354 31354 31354 31354
## minimum_nights 31354 31354 31354 31354
## number_of_reviews 31354 31354 31354 31354
## reviews_per_month 31354 31354 31354 31354
## calculated_host_listings_count 31354 31354 31354 31354
## availability_365 31354 31354 31354 31354
## list_history 26147 26147 26147 26147
## min_spend 31354 31354 31354 31354
## number_of_reviews reviews_per_month
## latitude 31354 31354
## longitude 31354 31354
## price 31354 31354
## minimum_nights 31354 31354
## number_of_reviews 31354 31354
## reviews_per_month 31354 31354
## calculated_host_listings_count 31354 31354
## availability_365 31354 31354
## list_history 26147 26147
## min_spend 31354 31354
## calculated_host_listings_count
## latitude 31354
## longitude 31354
## price 31354
## minimum_nights 31354
## number_of_reviews 31354
## reviews_per_month 31354
## calculated_host_listings_count 31354
## availability_365 31354
## list_history 26147
## min_spend 31354
## availability_365 list_history min_spend
## latitude 31354 26147 31354
## longitude 31354 26147 31354
## price 31354 26147 31354
## minimum_nights 31354 26147 31354
## number_of_reviews 31354 26147 31354
## reviews_per_month 31354 26147 31354
## calculated_host_listings_count 31354 26147 31354
## availability_365 31354 26147 31354
## list_history 26147 26147 26147
## min_spend 31354 26147 31354
##
## P
## latitude longitude price minimum_nights
## latitude 0.0000 0.0000 0.0000
## longitude 0.0000 0.0000 0.0000
## price 0.0000 0.0000 0.0000
## minimum_nights 0.0000 0.0000 0.0000
## number_of_reviews 0.0172 0.0000 0.0000 0.0000
## reviews_per_month 0.0014 0.0000 0.0000 0.0000
## calculated_host_listings_count 0.0000 0.0000 0.0000 0.0000
## availability_365 0.5140 0.0000 0.0000 0.0000
## list_history 0.3137 0.0000 0.6345 0.0000
## min_spend 0.0113 0.0000 0.0000 0.0000
## number_of_reviews reviews_per_month
## latitude 0.0172 0.0014
## longitude 0.0000 0.0000
## price 0.0000 0.0000
## minimum_nights 0.0000 0.0000
## number_of_reviews 0.0000
## reviews_per_month 0.0000
## calculated_host_listings_count 0.0000 0.0000
## availability_365 0.0815 0.0000
## list_history 0.0000 0.0000
## min_spend 0.0000 0.0000
## calculated_host_listings_count
## latitude 0.0000
## longitude 0.0000
## price 0.0000
## minimum_nights 0.0000
## number_of_reviews 0.0000
## reviews_per_month 0.0000
## calculated_host_listings_count
## availability_365 0.0000
## list_history 0.0000
## min_spend 0.0000
## availability_365 list_history min_spend
## latitude 0.5140 0.3137 0.0113
## longitude 0.0000 0.0000 0.0000
## price 0.0000 0.6345 0.0000
## minimum_nights 0.0000 0.0000 0.0000
## number_of_reviews 0.0815 0.0000 0.0000
## reviews_per_month 0.0000 0.0000 0.0000
## calculated_host_listings_count 0.0000 0.0000 0.0000
## availability_365 0.0000 0.0000
## list_history 0.0000 0.0000
## min_spend 0.0000 0.0000
Multi-linear regression
# logistic regression with general linear model (glm)
mod <- glm(factor(price) ~ neighbourhood_group + room_type + minimum_nights + number_of_reviews + availability_365,
family = binomial(link='logit'),
data = fp)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
# Review output
summary(mod)
##
## Call:
## glm(formula = factor(price) ~ neighbourhood_group + room_type +
## minimum_nights + number_of_reviews + availability_365, family = binomial(link = "logit"),
## data = fp)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -4.1745 0.0155 0.0198 0.0255 0.0731
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 5.912e+00 1.176e+00 5.027 4.98e-07 ***
## neighbourhood_groupBrooklyn 1.653e+00 1.166e+00 1.418 0.156
## neighbourhood_groupManhattan 2.148e+00 1.247e+00 1.722 0.085 .
## neighbourhood_groupQueens 4.886e-01 1.156e+00 0.423 0.673
## neighbourhood_groupStaten Island 1.570e+01 2.609e+03 0.006 0.995
## room_typePrivate room 3.197e-01 6.877e-01 0.465 0.642
## room_typeShared room 1.466e+01 1.568e+03 0.009 0.993
## minimum_nights 8.820e-03 3.522e-02 0.250 0.802
## number_of_reviews 4.475e-03 8.954e-03 0.500 0.617
## availability_365 2.830e-03 2.904e-03 0.974 0.330
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 164.80 on 31353 degrees of freedom
## Residual deviance: 158.05 on 31344 degrees of freedom
## AIC: 178.05
##
## Number of Fisher Scoring iterations: 21
Residuals of the model
par(mfrow = c(1, 1))
# price
plot(fp$price, mod$residuals)
# neighbourhood_group
plot(fp$neighbourhood_group, mod$residuals)
# neighbourhood
plot(fp$neighbourhood, mod$residuals)
# room_type
plot(fp$room_type, mod$residuals)
# minimum_nights
plot(fp$minimum_nights, mod$residuals)
# number_of_reviews
plot(fp$number_of_reviews, mod$residuals)
Visualization of Multiple Regression
coe <- summary(mod)$coefficients # get coefficients and related stats
coe_CI <- as.data.frame(cbind(coe[-1, ], confint(mod)[-1, ])) # find and bind CI, remove Intercept
## Waiting for profiling to be done...
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
names(coe_CI) <- c("estimate", "se", "t", "pval","low_CI","high_CI")
# Order base on p-value
htmlTable(round(coe_CI[order(coe_CI$pval, decreasing = FALSE), ], 3))
| estimate | se | t | pval | low_CI | high_CI | |
|---|---|---|---|---|---|---|
| neighbourhood_groupManhattan | 2.148 | 1.247 | 1.722 | 0.085 | -0.947 | 4.534 |
| neighbourhood_groupBrooklyn | 1.653 | 1.166 | 1.418 | 0.156 | -1.366 | 3.733 |
| availability_365 | 0.003 | 0.003 | 0.974 | 0.33 | -0.003 | 0.009 |
| number_of_reviews | 0.004 | 0.009 | 0.5 | 0.617 | -0.008 | 0.028 |
| room_typePrivate room | 0.32 | 0.688 | 0.465 | 0.642 | -1.042 | 1.744 |
| neighbourhood_groupQueens | 0.489 | 1.156 | 0.423 | 0.673 | -2.519 | 2.547 |
| minimum_nights | 0.009 | 0.035 | 0.25 | 0.802 | -0.006 | 0.117 |
| room_typeShared room | 14.662 | 1567.654 | 0.009 | 0.993 | -127.713 | |
| neighbourhood_groupStaten Island | 15.696 | 2608.514 | 0.006 | 0.995 | -221.21 |
# reorder by p-value
(g1 <- ggplot(coe_CI, aes(x = estimate, y = reorder(row.names(coe_CI),desc(pval)))) +
geom_point(size = 3) +
xlim(min(coe_CI$low_CI), max(coe_CI$high_CI)) +
ylab("Variable") +
xlab("Coefficient") +
theme_bw()
)
# Use geom_segment to illustrate CI
(g2 <- g1 +
geom_vline(xintercept = 0, color = "red")) +
geom_segment(aes(yend = reorder(row.names(coe_CI),desc(pval))),
xend = coe_CI$high_CI, color = "Blue") +
geom_segment(aes(yend = reorder(row.names(coe_CI),desc(coe_CI$pval))),
xend = coe_CI$low_CI, color = "Blue") +
xlab("Coefficient with Confidence Interval")
## Warning: Removed 2 rows containing missing values (geom_segment).